# This script contains the code for the analysis that underpins the first article I wrote on linkedin
# Link to code
# load libraries - these are very standard R libraries see https://www.tidyverse.org/
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readxl)
# load the data - downloaded from https://www.gridwatch.templar.co.uk/download.php
# filter by date so that subsequent analysis is consistent
dat <- readr::read_csv('~/Downloads/gridwatch.csv') %>%
filter(timestamp < '2022-05-12', timestamp > '2012-01-01')
## Rows: 1150600 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (23): id, demand, frequency, coal, nuclear, ccgt, wind, pumped, hydro, ...
## dttm (1): timestamp
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# check out the data - one record every 5 minutes, columns for each data source, 1.08M rows
dat
## # A tibble: 1,087,726 × 24
## id timestamp demand frequency coal nuclear ccgt wind pumped
## <dbl> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 62694 2012-01-01 00:00:01 30590 50.1 8693 7121 8568 2740 0
## 2 62695 2012-01-01 00:05:06 30490 50.0 8650 7120 8441 2812 0
## 3 62696 2012-01-01 00:10:01 30802 50 8880 7125 8427 2896 0
## 4 62697 2012-01-01 00:15:01 31180 50.0 9111 7122 8494 2964 0
## 5 62698 2012-01-01 00:20:01 31241 50.0 9195 7114 8449 2992 0
## 6 62699 2012-01-01 00:25:10 31340 50.0 9270 7090 8449 3040 0
## 7 62700 2012-01-01 00:30:01 31255 50.0 9164 7060 8490 3050 0
## 8 62701 2012-01-01 00:35:05 31054 50.1 8999 7034 8471 3059 0
## 9 62702 2012-01-01 00:40:14 31166 50.0 8887 7004 8426 3085 271
## 10 62703 2012-01-01 00:45:01 31232 50.0 8879 6982 8496 3100 283
## # … with 1,087,716 more rows, and 15 more variables: hydro <dbl>,
## # biomass <dbl>, oil <dbl>, solar <dbl>, ocgt <dbl>, french_ict <dbl>,
## # dutch_ict <dbl>, irish_ict <dbl>, ew_ict <dbl>, nemo <dbl>, other <dbl>,
## # north_south <dbl>, scotland_england <dbl>, ifa2 <dbl>, nsl <dbl>
nrow(dat)
## [1] 1087726
# we don't really need data at 5 minute intervals, so summarise data to hourly
# we also don't really need all of the power sources at this point
# find median of each 1 hour interval
# this also makes conversion from MW to MWh very simple
dat_1hr <- dat %>%
dplyr::select(timestamp, demand, wind, solar, nuclear, ccgt) %>%
dplyr::mutate(ts_hour = lubridate::floor_date(timestamp, 'hours')) %>%
dplyr::group_by(ts_hour) %>%
dplyr::summarise_if(is.numeric, median, na.rm = TRUE) %>%
dplyr::ungroup() %>%
dplyr::mutate_if(
is.numeric,
~ifelse(is.na(.), 0, .)
)
#now have a nuch smaller dataset - 90k rows
dat_1hr
## # A tibble: 90,713 × 6
## ts_hour demand wind solar nuclear ccgt
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2012-01-01 00:00:00 31183 3045 0 7075 8460.
## 2 2012-01-01 01:00:00 30411 2984. 0 6942 8422
## 3 2012-01-01 02:00:00 29320 2928. 0 6944. 8280
## 4 2012-01-01 03:00:00 27427 2970 0 6946. 7957
## 5 2012-01-01 04:00:00 26416. 3004. 0 7014 7560.
## 6 2012-01-01 05:00:00 25822. 2926 0 7116 7210
## 7 2012-01-01 06:00:00 26032 2982. 0 7118 7307
## 8 2012-01-01 07:00:00 25558. 2718. 0 7123 7292.
## 9 2012-01-01 08:00:00 24696. 2644. 0 7124 6989
## 10 2012-01-01 09:00:00 26762. 2643 0 7124. 7314.
## # … with 90,703 more rows
nrow(dat_1hr)
## [1] 90713
# get some intuition about the data
dat_1hr %>%
dplyr::filter(ts_hour > '2019-01-01', ts_hour < '2019-12-31') %>%
ggplot(aes(ts_hour, demand)) + geom_line() + ylim(0,50000) + theme_bw()

dat_1hr %>%
dplyr::filter(ts_hour > '2019-03-01', ts_hour < '2019-03-16') %>%
ggplot(aes(ts_hour, demand)) + geom_line() + ylim(0,50000) + theme_bw()

# get some intuition about the data- what does a year look like with demand and supply?
dat_1hr %>%
dplyr::filter(ts_hour > '2019-01-01', ts_hour < '2019-12-31') %>%
ggplot(aes(ts_hour, demand, color = 'black')) +
geom_line() +
geom_line(aes(y=ccgt), color = 'magenta', alpha = 0.3) +
geom_line(aes(y=solar), color = 'orange', alpha = 0.5) +
geom_line(aes(y=wind), color = 'blue', alpha = 0.8) +
geom_line(aes(y=nuclear), color = 'darkgrey') +
ylim(0,50000) +
xlab('') + ylab('Generation/Demand MWh') +
theme_bw() +
scale_colour_manual(name = '', guide = 'legend',
values =c(
'demand' = 'black',
'wind'='blue',
'solar'='orange',
'nuclear' = 'darkgrey',
'ccgt' = 'magenta'))

# what about a month
dat_1hr %>%
dplyr::filter(ts_hour > '2019-03-01', ts_hour < '2019-03-16') %>%
ggplot(aes(ts_hour, demand, color = 'black')) +
geom_line() +
geom_line(aes(y=wind), color = 'blue') +
geom_line(aes(y=solar), color = 'orange') +
geom_line(aes(y=nuclear), color = 'darkgrey') +
geom_line(aes(y=ccgt), color = 'magenta') +
ylim(0,50000) +
xlab('') + ylab('Generation/Demand MWh') +
theme_bw() +
scale_colour_manual(name = '', guide = 'legend',
values =c(
'demand' = 'black',
'wind'='blue',
'solar'='orange',
'nuclear' = 'darkgrey',
'ccgt' = 'magenta'))

# and finally - what about a low wind month?
dat_1hr %>%
dplyr::filter(ts_hour > '2022-03-01', ts_hour < '2022-04-01') %>%
ggplot(aes(ts_hour, demand, color = 'black')) +
geom_line() +
geom_line(aes(y=wind), color = 'blue') +
geom_line(aes(y=solar), color = 'orange') +
geom_line(aes(y=nuclear), color = 'darkgrey') +
geom_line(aes(y=ccgt), color = 'magenta') +
ylim(0,50000) +
xlab('') + ylab('Generation/Demand MWh') +
theme_bw() +
scale_colour_manual(name = '', guide = 'legend',
values =c(
'demand' = 'black',
'wind'='blue',
'solar'='orange',
'nuclear' = 'darkgrey',
'ccgt' = 'magenta'))

# note that 'demand' in gridwatch data is affected by unmetered wind a solar
# this shows up as reductions in demand - eg household solar reduces that household's requirements
# so far we have seen that demand is variable - day/night, weekend vs weekday, winter vs summer.
# solar and wind are also variable, but gas seems to be able to ramp up and down around demand/RE variablility
# the next step of the analysis is to see what would happen with different grids
# to do this we need to normalise the wind and solar output - ie what would it have looked like if we
# hadn't been building more over time.
# simplest way to do this is to use the cumulative maximum for wind and solar
# find max of each 1 hour interval and then cumulative max for wind and solar
max_vargen <- dat %>%
dplyr::select(timestamp, demand, wind, solar) %>%
dplyr::filter(solar < 100000) %>% # gets rid of two rogue rows
dplyr::mutate(ts_hour = lubridate::floor_date(timestamp, 'hours')) %>%
dplyr::group_by(ts_hour) %>%
dplyr::summarise_if(is.numeric, max, na.rm = TRUE) %>%
dplyr::ungroup() %>%
dplyr::mutate_if(
is.numeric,
~ifelse(is.na(.), 0, .)
) %>%
mutate(max_wind = cummax(wind),
max_solar = cummax(solar)) %>%
dplyr::select(ts_hour, max_wind, max_solar)
max_vargen
## # A tibble: 90,713 × 3
## ts_hour max_wind max_solar
## <dttm> <dbl> <dbl>
## 1 2012-01-01 00:00:00 3100 0
## 2 2012-01-01 01:00:00 3116 0
## 3 2012-01-01 02:00:00 3116 0
## 4 2012-01-01 03:00:00 3116 0
## 5 2012-01-01 04:00:00 3116 0
## 6 2012-01-01 05:00:00 3116 0
## 7 2012-01-01 06:00:00 3116 0
## 8 2012-01-01 07:00:00 3116 0
## 9 2012-01-01 08:00:00 3116 0
## 10 2012-01-01 09:00:00 3116 0
## # … with 90,703 more rows
# combine median and max wind/solar so we can calculate % wind/solar
# this will be an overestimate of % since we will be likely be undestimating wind/solar capacity
# but in addition mix and location of wind has changed over time -> high capacity factors
# lets us normalise wind and solar output against max capacity available at time
normalised_vargen <- dat_1hr %>%
inner_join(max_vargen, by='ts_hour') %>%
mutate(pct_wind = wind/max_wind,
pct_solar = solar/max_solar)
#plot normalised wind output
ggplot(normalised_vargen %>% dplyr::filter(ts_hour > '2013-01-01'), aes(ts_hour, wind)) +
geom_line(aes(y=max_wind), color = 'black', linetype = 'dashed') +
geom_line(color='black') +
theme_bw() +
xlab('') + ylab('Generation MWh') +
theme_bw()

ggplot(normalised_vargen %>% dplyr::filter(ts_hour > '2013-01-01'), aes(ts_hour, pct_wind)) +
geom_line() + theme_bw()

# plot normalised solar
ggplot(normalised_vargen %>% dplyr::filter(ts_hour > '2017-04-01'), aes(ts_hour, solar)) +
geom_line(aes(y=max_solar)) +
geom_line() +
theme_bw()

ggplot(normalised_vargen %>% dplyr::filter(ts_hour > '2017-04-01'), aes(ts_hour, pct_solar)) + geom_line() + theme_bw()

# model 1 - simple model with just wind, gas, solar, no capacity constraints or overgeneration
#specify wind, solar and gas capacity
wind_cap_m1 <- 20000
gas_cap_m1 <- 50000
solar_cap_m1 <- 10000
# scaleup wind and solar to model capacity, then let gas fill in the gaps
model1 <- normalised_vargen %>%
dplyr::filter(ts_hour > '2017-04-01') %>%
dplyr::select(ts_hour, demand, pct_wind, pct_solar) %>%
dplyr::mutate(solar = pct_solar * solar_cap_m1,
wind = pct_wind * wind_cap_m1,
gas = demand - wind - solar)
model1 %>%
dplyr::filter(ts_hour > '2022-03-01', ts_hour < '2022-04-01') %>%
ggplot(aes(ts_hour, demand, color = 'black')) +
geom_line() +
geom_line(aes(y=wind), color = 'blue') +
geom_line(aes(y=solar), color = 'orange') +
geom_line(aes(y=gas), color = 'magenta') +
ylim(0,50000) +
xlab('') + ylab('Generation/Demand MWh') +
theme_bw() +
scale_colour_manual(name = '', guide = 'legend',
values =c(
'demand' = 'black',
'wind'='blue',
'solar'='orange',
'nuclear' = 'darkgrey',
'gas' = 'magenta'))

# model 2 - add curtailment and excess
# need this model to look at what happens when we add more wind to the system.
# what happens when we have 'too much' wind for system to cope with?
# reach point of diminishing returns- most of new wind output curtailed but still have wind lull problem
# EV's could help by soaking up excess, interconnectors could export/import
# but this is the point where storage starts to help us use more of our otherwise curtailed wind
wind_cap_m2 <- 60000
gas_cap_m2 <- 50000
solar_cap_m2 <- 20000
model2 <- normalised_vargen %>%
dplyr::filter(ts_hour > '2017-04-01') %>%
dplyr::select(ts_hour, demand, pct_wind, pct_solar) %>%
dplyr::mutate(solar = pct_solar * solar_cap_m2,
wind = pct_wind * wind_cap_m2,
gas = pmax(pmin(demand - wind - solar, gas_cap_m2), 0),
deficit = pmax(demand - wind - solar - gas, 0),
curtailment = pmax(wind + solar + gas - demand, 0))
model2 %>%
dplyr::filter(ts_hour > '2021-01-01', ts_hour < '2022-01-01') %>%
ggplot(aes(ts_hour, demand, color = 'black')) +
geom_line() +
geom_line(aes(y=wind), color = 'blue') +
geom_line(aes(y=solar), color = 'orange') +
geom_line(aes(y=gas), color = 'magenta') +
geom_line(aes(y=-curtailment), color = 'red') +
xlab('') + ylab('Generation/Demand MWh') +
theme_bw() +
scale_colour_manual(name = '', guide = 'legend',
values =c(
'demand' = 'black',
'wind'='blue',
'solar'='orange',
'nuclear' = 'darkgrey',
'gas' = 'magenta',
'curtailment' = 'red'))

model2 %>%
dplyr::filter(ts_hour > '2021-01-01', ts_hour < '2022-01-01') %>%
dplyr::summarise_at(c('wind', 'solar', 'gas', 'curtailment', 'deficit'), ~sum(., na.rm = TRUE)/1000000)
## # A tibble: 1 × 5
## wind solar gas curtailment deficit
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 207. 21.6 77.0 45.8 0
model2 %>%
dplyr::filter(ts_hour > '2022-03-01', ts_hour < '2022-04-01') %>%
ggplot(aes(ts_hour, demand, color = 'black')) +
geom_line() +
geom_line(aes(y=wind), color = 'blue') +
geom_line(aes(y=solar), color = 'orange') +
geom_line(aes(y=gas), color = 'magenta') +
geom_line(aes(y=-curtailment), color = 'red') +
xlab('') + ylab('Generation/Demand MWh') +
theme_bw() +
scale_colour_manual(name = '', guide = 'legend',
values =c(
'demand' = 'black',
'wind'='blue',
'solar'='orange',
'nuclear' = 'darkgrey',
'gas' = 'magenta',
'curtailment' = 'red'))

# now let's try to do the same thing but across a range of wind capacity
model2_fn <- function(df, start_date, end_date, wind_cap, gas_cap, solar_cap) {
df %>%
dplyr::filter(ts_hour >= start_date, ts_hour <= end_date) %>%
dplyr::select(ts_hour, demand, pct_wind, pct_solar) %>%
dplyr::mutate(solar = pct_solar * solar_cap,
wind = pct_wind * wind_cap,
gas = pmax(pmin(demand - wind - solar, gas_cap), 0),
deficit = pmax(demand - wind - solar - gas, 0),
curtailment = pmax(wind + solar + gas - demand, 0)) %>%
dplyr::summarise_at(c('wind', 'solar', 'gas', 'curtailment', 'deficit'), ~sum(., na.rm = TRUE)/1000000)
}
normalised_vargen %>%
model2_fn(
start_date = '2017-01-01',
end_date = '2021-12-31',
wind_cap = 20000,
gas_cap = 50000,
solar_cap = 20000)
## # A tibble: 1 × 5
## wind solar gas curtailment deficit
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 369. 117. 846. 1.31 0
# do a full simulation with different amounts of wind and gas
sim_df <- tibble(
input_data = list(normalised_vargen),
start_date = '2017-01-01',
end_date = '2021-12-31',
solar_cap = 20000
) %>% tidyr::crossing(
wind_cap = c(10000,20000, 40000, 80000, 120000),
gas_cap = c(20000, 35000, 50000)
) %>%
dplyr::mutate(
sim_out = purrr::pmap(
.f=model2_fn,
.l=list(
df=input_data,
start_date=start_date,
end_date=end_date,
wind_cap=wind_cap,
solar_cap=solar_cap,
gas_cap=gas_cap
)
)
) %>%
tidyr::unnest(sim_out)
sim_df
## # A tibble: 15 × 11
## input_data start_date end_date solar_cap wind_cap gas_cap wind solar gas
## <list> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 <tibble> 2017-01-01 2021-12-31 20000 10000 20000 185. 117. 808.
## 2 <tibble> 2017-01-01 2021-12-31 20000 10000 35000 185. 117. 1016.
## 3 <tibble> 2017-01-01 2021-12-31 20000 10000 50000 185. 117. 1029.
## 4 <tibble> 2017-01-01 2021-12-31 20000 20000 20000 369. 117. 713.
## 5 <tibble> 2017-01-01 2021-12-31 20000 20000 35000 369. 117. 840.
## 6 <tibble> 2017-01-01 2021-12-31 20000 20000 50000 369. 117. 846.
## 7 <tibble> 2017-01-01 2021-12-31 20000 40000 20000 738. 117. 465.
## 8 <tibble> 2017-01-01 2021-12-31 20000 40000 35000 738. 117. 523.
## 9 <tibble> 2017-01-01 2021-12-31 20000 40000 50000 738. 117. 525.
## 10 <tibble> 2017-01-01 2021-12-31 20000 80000 20000 1476. 117. 231.
## 11 <tibble> 2017-01-01 2021-12-31 20000 80000 35000 1476. 117. 252.
## 12 <tibble> 2017-01-01 2021-12-31 20000 80000 50000 1476. 117. 253.
## 13 <tibble> 2017-01-01 2021-12-31 20000 120000 20000 2214. 117. 141.
## 14 <tibble> 2017-01-01 2021-12-31 20000 120000 35000 2214. 117. 152.
## 15 <tibble> 2017-01-01 2021-12-31 20000 120000 50000 2214. 117. 152.
## # … with 2 more variables: curtailment <dbl>, deficit <dbl>
# what does wind, solar, gas generaiton look like
sim_df %>%
dplyr::select(wind_cap, gas_cap, wind, solar, gas) %>%
tidyr::pivot_longer(cols = c('wind', 'solar', 'gas'), names_to = 'source', values_to = 'TWh') %>%
ggplot(aes(as.factor(wind_cap/1000), TWh, fill=source)) +
geom_bar(stat='identity') +
facet_wrap(~gas_cap/1000, labeller = labeller(.default = function(val) {paste0(val, ' GW Gas')})) +
xlab('Total Wind Capacity (GW)') +
theme_bw() +
scale_fill_manual(name = '', guide = 'legend',
values =c(
# 'demand' = 'black',
'wind'='blue',
'solar'='orange',
# 'nuclear' = 'darkgrey',
'gas' = 'magenta'
# 'curtailment' = 'red',
# 'deficit' = 'purple'
))

#what about deficit and curtailment
sim_df %>%
dplyr::transmute(wind_cap, gas_cap, curtailment, deficit = -deficit) %>%
tidyr::pivot_longer(cols = c('curtailment', 'deficit'), names_to = 'source', values_to = 'TWh') %>%
ggplot(aes(as.factor(wind_cap/1000), TWh, fill=source)) +
geom_bar(stat='identity') +
facet_wrap(~gas_cap/1000, labeller = labeller(.default = function(val) {paste0(val, ' GW Gas')})) +
xlab('Total Wind Capacity (GW)') +
theme_bw() +
scale_fill_manual(name = '', guide = 'legend',
values =c(
# 'demand' = 'black',
#'wind'='blue',
#'solar'='orange',
# 'nuclear' = 'darkgrey',
#'gas' = 'magenta'
'curtailment' = 'red',
'deficit' = 'purple'
))

# finally consider amount of gas, deficit and curtailment
sim_df %>%
dplyr::transmute(wind_cap, gas_cap, curtailment, gas, deficit=-deficit) %>%
tidyr::pivot_longer(cols = c('curtailment', 'gas', 'deficit'), names_to = 'source', values_to = 'TWh') %>%
ggplot(aes(as.factor(wind_cap/1000), TWh, fill=source)) +
geom_bar(stat='identity') +
facet_wrap(~gas_cap/1000, labeller = labeller(.default = function(val) {paste0(val, ' GW Gas')})) +
xlab('Total Wind Capacity (GW)') +
theme_bw() +
scale_fill_manual(name = '', guide = 'legend',
values =c(
# 'demand' = 'black',
#'wind'='blue',
#'solar'='orange',
# 'nuclear' = 'darkgrey',
'gas' = 'magenta',
'curtailment' = 'red',
'deficit' = 'purple'
))
